## Title: Simulation for LAJE vs iTSLS
## Date: 2016-09-10
## Author: Matthew Blackwell

library(AER)
library(stargazer)
library(stats)
source("joint-iv-utils.R")

true.laje <- 3
Ns <- c(200, 1000, 2500)
cc.probs <- c(0.8,0.6,0.4,0.2)
sims <- 1000

param.table <- expand.grid(Ns, cc.probs)
num.params <- nrow(param.table)

laje.hold <- matrix(NA, sims, num.params)
laje.se.hold <- matrix(NA, sims, num.params)
lacde0.hold <- matrix(NA, sims, num.params)
lacde0.se.hold <- matrix(NA, sims, num.params)
lacde1.hold <- matrix(NA, sims, num.params)
lacde1.se.hold <- matrix(NA, sims, num.params)
tsls.laje.hold <- matrix(NA, sims, num.params)
tsls.laje.ci.hi <- matrix(NA, sims, num.params)
tsls.laje.ci.lo <- matrix(NA, sims, num.params)
tsls.lacde0.hold <- matrix(NA, sims, num.params)
tsls.lacde0.ci.hi <- matrix(NA, sims, num.params)
tsls.lacde0.ci.lo <- matrix(NA, sims, num.params)
tsls.late1.hold <- matrix(NA, sims, num.params)
tsls.int.hold <- matrix(NA, sims, num.params)
tsls.int.se.hold <- matrix(NA, sims, num.params)
tsls.param <- matrix(NA, sims, num.params)

cc.effects <- c(1, -0.7, 0.1, -0.1, 0.2, 0.2, -0.4, 0.2, 0.7)
names(cc.effects) <- c("cc", "ca", "cn", "aa", "ac", "an", "nn", "nc", "na")

true.lacde0 <- rep(NA, times = nrow(param.table))
tsls.qoi0 <- rep(NA, times = nrow(param.table))

set.seed(02143)
for (p in 1:nrow(param.table)) {
  N <- param.table[p, 1]
  cc.prob <- param.table[p, 2]
  rho <- c(cc.prob, rep((1 - cc.prob)/8, 8))
  names(rho) <- c("cc", "ca", "cn", "aa", "ac", "an", "nn", "nc", "na")
  ## rho[c("ca", "aa", "ac", "an", "na")] <- 0
  ## rho <- rho/sum(rho)
  true.lacde0[p] <- (cc.effects["cc"] * rho["cc"] +
                       cc.effects["cn"] * rho["cn"])/(rho["cc"] + rho["cn"])
  ww <- (rho["cc"] + rho["cn"])/sum(rho[c("cc", "cn", "ca")])
  bias.term <- cc.effects["cc"] + 2 * cc.effects["ca"] - 2 * cc.effects["cc"]
  tsls.qoi0[p] <- ww * true.lacde0[p] + (1 - ww) * bias.term
  for (i in 1:sims) {
    c1 <- t(rmultinom(N, 1, rho))
    colnames(c1) <- c("cc", "ca", "cn", "aa", "ac", "an", "nn", "nc", "na")

    z1 <- sample(rep(c(0,1,1,1,1), N/5))
    z2 <- sample(rep(c(0,0,1,1,0), N/5))

    d1 <- rowSums(c1[,c("cc", "ca", "cn")]) * z1 + rowSums(c1[,c("aa", "ac", "an")])
    d2 <- rowSums(c1[,c("cc", "ac", "nc")]) * z2 + rowSums(c1[,c("aa", "ca", "na")])

    beta.0 <- rnorm(N, rowSums(t(cc.effects * t(c1))), 1)
    beta.d1 <- rnorm(N, rowSums(t(cc.effects * t(c1))), 1)
    beta.d2 <- rnorm(N, rowSums(t(cc.effects * t(c1))), 1)
    beta.int <- rnorm(N, rowSums(t(cc.effects * t(c1))), 1)
    epsilon <- rnorm(N, 0, 1)

    y00 <- beta.0 + epsilon
    y10 <- beta.0 + beta.d1 + epsilon
    y01 <- beta.0 + beta.d2 + epsilon
    y11 <- beta.0 + beta.d1 + beta.d2 + beta.int + epsilon
    y <- y00 + d1 * (y10-y00) + d2 * (y01-y00) +
      d1 * d2 * ((y11 - y01) - (y10 - y00))

    out <- joint.iv(y, d1, d2, z1, z2)
    laje.hold[i, p] <- out$tau["laje"]
    laje.se.hold[i, p] <- out$se["laje"]
    lacde0.hold[i, p] <- out$tau["cc.cn"]
    lacde0.se.hold[i, p] <- out$se["cc.cn"]
    lacde1.hold[i, p] <- out$tau["cc.ca"]
    lacde1.se.hold[i, p] <- out$se["cc.ca"]

    #tsls.mod <- ivreg(y ~ I(d1*(1-d2)) + I(d2*(1-d1)) + d1:d2| I(z1*(1-z2)) + I(z2*(1-z1)) + z1:z2)
    tsls.mod <- ivreg(y ~ d1*d2| z1*z2)
    tsls.laje.hold[i, p] <- coef(tsls.mod)["d1"] + coef(tsls.mod)["d2"] + coef(tsls.mod)["d1:d2"]
    tmp <- vcov(tsls.mod)
    this.se <- sqrt(tmp["d1","d1"] + tmp["d2","d2"] + tmp["d1:d2","d1:d2"] + 2*tmp["d1","d2"] + 2*tmp["d1","d1:d2"] + 2*tmp["d2","d1:d2"])
    tsls.laje.ci.hi[i, p] <- tsls.laje.hold[i, p] + qt(0.975, df = N-4)*this.se
    tsls.laje.ci.lo[i, p] <- tsls.laje.hold[i, p] - qt(0.975, df = N-4)*this.se

    tsls.int.hold[i, p] <- coef(tsls.mod)["d1:d2"]
    tsls.int.se.hold[i, p] <- sqrt(tmp["d1:d2", "d1:d2"])
    tsls.lacde0.hold[i, p] <- coef(tsls.mod)["d1"]
    tmp <- vcov(tsls.mod)
    this.se <- sqrt(tmp["d1","d1"])
    tsls.lacde0.ci.hi[i, p] <- tsls.lacde0.hold[i, p] + qt(0.975, df = N-2)*this.se
    tsls.lacde0.ci.lo[i, p] <- tsls.lacde0.hold[i, p] - qt(0.975, df = N-2)*this.se

  }
}

laje.ci.hi <- laje.hold + 1.96 * laje.se.hold
laje.ci.lo <- laje.hold - 1.96 * laje.se.hold
laje.coverage <- matrix(colMeans(true.laje > laje.ci.lo & true.laje < laje.ci.hi), nrow = length(Ns))
laje.bias <- matrix(colMeans(laje.hold-true.laje), nrow = length(Ns))
laje.rmse <- matrix(sqrt(colMeans((laje.hold - true.laje)^2)), nrow = length(Ns))
tsls.laje.bias <- matrix(colMeans(tsls.laje.hold - true.laje), nrow = length(Ns))
tsls.laje.rmse <- matrix(sqrt(colMeans((tsls.laje.hold - true.laje)^2)), nrow = length(Ns))
tsls.laje.coverage <- matrix(colMeans(tsls.laje.ci.lo < true.laje & tsls.laje.ci.hi > true.laje), nrow = length(Ns))

true.lacde0.mat <- matrix(true.lacde0, nrow = sims, ncol= 12, byrow=TRUE)
lacde0.ci.hi <- lacde0.hold + 1.96 * lacde0.se.hold
lacde0.ci.lo <- lacde0.hold - 1.96 * lacde0.se.hold
lacde0.coverage <- matrix(colMeans(true.lacde0.mat > lacde0.ci.lo & true.lacde0.mat < lacde0.ci.hi), nrow = length(Ns))
lacde0.bias <- matrix(colMeans(lacde0.hold-true.lacde0.mat), nrow = length(Ns))
lacde0.rmse <- matrix(sqrt(colMeans((lacde0.hold - true.lacde0.mat)^2)), nrow = length(Ns))
tsls.lacde0.bias <- matrix(colMeans(tsls.lacde0.hold - true.lacde0.mat), nrow = length(Ns))
tsls.lacde0.rmse <- matrix(sqrt(colMeans((tsls.lacde0.hold - true.lacde0.mat)^2)), nrow = length(Ns))
tsls.lacde0.coverage <- matrix(colMeans(tsls.lacde0.ci.lo < true.lacde0.mat & tsls.lacde0.ci.hi > true.lacde0.mat), nrow = length(Ns))
true.lacde0.mat <- matrix(true.lacde0, nrow = sims, ncol= 12, byrow=TRUE)

tsls.lacde0.bias
matrix(tsls.qoi0-true.lacde0, nrow = length(Ns))

proposed <- data.frame(N=Ns,bias80=laje.bias[,1], RMSE80=laje.rmse[,1], coverage80=laje.coverage[,1], bias60=laje.bias[,2], RMSE60=laje.rmse[,2], coverage60=laje.coverage[,2], bias40=laje.bias[,3], RMSE40=laje.rmse[,3], coverage40=laje.coverage[,3])
tsls.est <- data.frame(N=Ns,bias80=tsls.laje.bias[,1], RMSE80=tsls.laje.rmse[,1], coverage80=tsls.laje.coverage[,1], bias60=tsls.laje.bias[,2], RMSE60=tsls.laje.rmse[,2], coverage60=tsls.laje.coverage[,2], bias40=tsls.laje.bias[,3], RMSE40=tsls.laje.rmse[,3], coverage40=tsls.laje.coverage[,3])
tab <- rbind(proposed, tsls.est)
tab
rownames(tab) <- c("", "LAJE", " ", "  ", "iTSLS", "    ")
stargazer(as.matrix(tab[,-c(5,6,7)]), float = FALSE)


lacde0.tab <- data.frame(N=Ns,bias80=lacde0.bias[,1], RMSE80=lacde0.rmse[,1], coverage80=lacde0.coverage[,1], bias60=lacde0.bias[,2], RMSE60=lacde0.rmse[,2], coverage60=lacde0.coverage[,2], bias40=lacde0.bias[,3], RMSE40=lacde0.rmse[,3], coverage40=lacde0.coverage[,3])
tsls.lacde0.est <- data.frame(N=Ns,bias80=tsls.lacde0.bias[,1], RMSE80=tsls.lacde0.rmse[,1], coverage80=tsls.lacde0.coverage[,1], bias60=tsls.lacde0.bias[,2], RMSE60=tsls.lacde0.rmse[,2], coverage60=tsls.lacde0.coverage[,2], bias40=tsls.lacde0.bias[,3], RMSE40=tsls.lacde0.rmse[,3], coverage40=tsls.lacde0.coverage[,3])
lacde0.tab <- rbind(lacde0.tab, tsls.lacde0.est)
lacde0.tab
rownames(lacde0.tab) <- c("", "LACDE(0)", " ", "  ", "TSLS", "    ")
stargazer(as.matrix(lacde0.tab[,-c(5,6,7)]), float = FALSE)
